Fit VBGE models to back-calculated length-at-age

Author

Max Lindmark, Jan Ohlberger, Anna Gårdmark

Published

January 15, 2024

Load libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidylog)

Attaching package: 'tidylog'

The following objects are masked from 'package:dplyr':

    add_count, add_tally, anti_join, count, distinct, distinct_all,
    distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
    full_join, group_by, group_by_all, group_by_at, group_by_if,
    inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
    relocate, rename, rename_all, rename_at, rename_if, rename_with,
    right_join, sample_frac, sample_n, select, select_all, select_at,
    select_if, semi_join, slice, slice_head, slice_max, slice_min,
    slice_sample, slice_tail, summarise, summarise_all, summarise_at,
    summarise_if, summarize, summarize_all, summarize_at, summarize_if,
    tally, top_frac, top_n, transmute, transmute_all, transmute_at,
    transmute_if, ungroup

The following objects are masked from 'package:tidyr':

    drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
    spread, uncount

The following object is masked from 'package:stats':

    filter
library(broom)
library(RColorBrewer)
library(viridis)
Loading required package: viridisLite
library(minpack.lm)
library(patchwork)
library(ggtext)
library(brms)
Loading required package: Rcpp
Loading 'brms' package (version 2.20.4). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').

Attaching package: 'brms'

The following object is masked from 'package:stats':

    ar
library(modelr)

Attaching package: 'modelr'

The following object is masked from 'package:broom':

    bootstrap
library(tidybayes)

Attaching package: 'tidybayes'

The following objects are masked from 'package:brms':

    dstudent_t, pstudent_t, qstudent_t, rstudent_t
library(ggridges)

Attaching package: 'ggridges'

The following objects are masked from 'package:tidybayes':

    scale_point_color_continuous, scale_point_color_discrete,
    scale_point_colour_continuous, scale_point_colour_discrete,
    scale_point_fill_continuous, scale_point_fill_discrete,
    scale_point_size_continuous
# devtools::install_github("seananderson/ggsidekick") ## not on CRAN 
library(ggsidekick); theme_set(theme_sleek())

# Load functions
home <- here::here()
fxn <- list.files(paste0(home, "/R/functions"))
invisible(sapply(FUN = source, paste0(home, "/R/functions/", fxn)))

Load cache

# qwraps2::lazyload_cache_dir(path = paste0(home, "/R/analyze-data/02-fit-vbge_cache/html"))

Read and trim data

d <- #read_csv(paste0(home, "/data/for-analysis/dat.csv")) %>% 
  read_csv("https://raw.githubusercontent.com/maxlindmark/perch-growth/master/data/for-analysis/dat.csv") %>% 
  filter(age_ring == "Y", # use only length-at-age by filtering on age_ring
         !area %in% c("VN", "TH")) 
Rows: 338460 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): age_ring, area, gear, ID, sex, keep
dbl (6): length_mm, age_bc, age_catch, catch_year, cohort, final_length

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
filter: removed 43,886 rows (13%), 294,574 rows remaining
# Minimum number of observations per area and cohort
d$area_cohort <- as.factor(paste(d$area, d$cohort))

d <- d %>%
  group_by(area_cohort) %>% 
  filter(n() > 100)
group_by: one grouping variable (area_cohort)
filter (grouped): removed 2,637 rows (1%), 291,937 rows remaining
# Minimum number of observations per area, cohort, and age
d$area_cohort_age <- as.factor(paste(d$area, d$cohort, d$age_bc))

d <- d %>%
  group_by(area_cohort_age) %>% 
  filter(n() > 10)
group_by: one grouping variable (area_cohort_age)
filter (grouped): removed 3,521 rows (1%), 288,416 rows remaining
# Minimum number of cohorts in a given area
cnt <- d %>%
  group_by(area) %>%
  summarise(n = n_distinct(cohort)) %>%
  filter(n >= 10)
group_by: one grouping variable (area)
summarise: now 10 rows and 2 columns, ungrouped
filter: no rows removed
d <- d[d$area %in% cnt$area, ]

# Plot cleaned data
ggplot(d, aes(age_bc, length_mm)) +
  geom_jitter(size = 0.1, height = 0, alpha = 0.1) +
  scale_x_continuous(breaks = seq(20)) +
  theme(axis.text.x = element_text(angle = 0)) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 15)) +
  labs(x = "Age", y = "Length (mm)") +
  guides(color = "none") + 
  facet_wrap(~area, scale = "free_x")

# Longitude and latitude attributes for each area
area <- c("BS", "BT", "FB", "FM", "HO", "JM", "MU", "RA", "SI_EK", "SI_HA", "TH")
nareas <- length(area)
lat <- c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4, 56.1)
lon <- c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7, 15.9)
area_attr <- data.frame(cbind(area = area, lat = lat, lon = lon)) %>%
  mutate_at(c("lat", "lon"), as.numeric)
mutate_at: converted 'lat' from character to double (0 new NA)
           converted 'lon' from character to double (0 new NA)

Fit VBGE models

# Get individual growth parameters (functions: VBGF/Gompertz,nls_out,fit_nls)
IVBG <- d %>%
  group_by(ID) %>%
  summarize(nls_out(fit_nls(length_mm, age_bc, min_nage = 5, model = "VBGF")))
group_by: one grouping variable (ID)
summarize: now 75,245 rows and 5 columns, ungrouped
IVBG <- IVBG %>% drop_na(k) # The NAs are because the model didn't converge or because they were below the threshold age
drop_na: removed 51,640 rows (69%), 23,605 rows remaining
IVBG <- IVBG %>%
  mutate(k_lwr = k - 1.96*k_se,
         k_upr = k + 1.96*k_se,
         linf_lwr = linf - 1.96*linf_se,
         linf_upr = linf + 1.96*linf_se,
         row_id = row_number())
mutate: new variable 'k_lwr' (double) with 23,603 unique values and 0% NA
        new variable 'k_upr' (double) with 23,603 unique values and 0% NA
        new variable 'linf_lwr' (double) with 23,603 unique values and 0% NA
        new variable 'linf_upr' (double) with 23,603 unique values and 0% NA
        new variable 'row_id' (integer) with 23,605 unique values and 0% NA
# Plot all K's
IVBG %>%
  #filter(row_id < 5000) %>%
  ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr)) +
  geom_point(alpha = 0.2) +
  geom_errorbar(alpha = 0.2) +
  NULL

# Plot all L_inf's
IVBG %>%
  #filter(row_id < 5000) %>%
  ggplot(aes(row_id, linf, ymin = linf_lwr, ymax = linf_upr)) +
  geom_point(alpha = 0.2) +
  geom_errorbar(alpha = 0.2) +
  NULL

# We can also consider removing individuals where the SE of k is larger than the fit
IVBG %>%
  #mutate(keep = ifelse(k > quantile(k_se, probs = 0.95), "N", "Y")) %>%
  mutate(keep = ifelse(k < k_se, "N", "Y")) %>%
  #filter(row_id < 10000) %>%
  ggplot(aes(row_id, k, ymin = k_lwr, ymax = k_upr, color = keep)) +
  geom_point(alpha = 0.5) +
  facet_wrap(~keep, ncol = 1) +
  geom_errorbar(alpha = 0.5) +
  NULL
mutate: new variable 'keep' (character) with 2 unique values and 0% NA

# Add back cohort and area variables
IVBG <- IVBG %>% 
  left_join(d %>% select(ID, area_cohort)) %>% 
  separate(area_cohort, into = c("area", "cohort"), remove = TRUE, sep = " ") %>% 
  mutate(cohort = as.numeric(cohort))
Adding missing grouping variables: `area_cohort_age`
select: dropped 11 variables (length_mm, age_bc, age_catch, age_ring, area, …)
Joining with `by = join_by(ID)`
left_join: added 2 columns (area_cohort_age, area_cohort)
> rows only in x 0
> rows only in y (146,393)
> matched rows 142,023 (includes duplicates)
> =========
> rows total 142,023
mutate: converted 'cohort' from character to double (0 new NA)
# Compare how the means and quantiles differ depending on this filtering
IVBG_filter <- IVBG %>%
  drop_na(k_se) %>%
  #filter(k_se < quantile(k_se, probs = 0.95)) %>% 
  filter(k_se < k)
drop_na: no rows removed
filter: removed 4,192 rows (3%), 137,831 rows remaining
# Summarize growth coefficients by cohort and area
VBG <- IVBG %>%
  filter(k_se < k) %>% # new!
  group_by(cohort, area) %>%
  summarize(k_mean = mean(k, na.rm = T),
            k_median = quantile(k, prob = 0.5, na.rm = T),
            linf_median = quantile(linf, prob = 0.5, na.rm = T),
            k_lwr = quantile(k, prob = 0.05, na.rm = T),
            k_upr = quantile(k, prob = 0.95, na.rm = T)) %>% 
  ungroup()
filter: removed 4,192 rows (3%), 137,831 rows remaining
group_by: 2 grouping variables (cohort, area)
summarize: now 306 rows and 7 columns, one group variable remaining (cohort)
ungroup: no grouping variables
VBG_filter <- IVBG_filter %>%
  group_by(cohort, area) %>%
  summarize(k_mean = mean(k, na.rm = T),
            k_median = quantile(k, prob = 0.5, na.rm = T),
            k_lwr = quantile(k, prob = 0.05, na.rm = T),
            k_upr = quantile(k, prob = 0.95, na.rm = T)) %>% 
  ungroup()
group_by: 2 grouping variables (cohort, area)
summarize: now 306 rows and 6 columns, one group variable remaining (cohort)
ungroup: no grouping variables
ggplot() +
  geom_ribbon(data = VBG, aes(cohort, k_median, ymin = k_lwr, ymax = k_upr,
                              fill = "All k's"), alpha = 0.4) +
  geom_ribbon(data = VBG_filter, aes(cohort, k_median, ymin = k_lwr, ymax = k_upr,
                                     fill = "Filtered"), alpha = 0.4) +
  geom_line(data = VBG, aes(cohort, k_median, color = "All k's")) + 
  geom_line(data = VBG_filter, aes(cohort, k_median, color = "Filtered"), linetype = 2) + 
  guides(fill = "none") +
  facet_wrap(~area)

ggplot() +
  geom_line(data = VBG, aes(cohort, k_median, color = "All k's")) + 
  geom_line(data = VBG_filter, aes(cohort, k_median, color = "Filtered"), linetype = 2) + 
  guides(fill = "none") +
  facet_wrap(~area)

# No difference at all really. We should probably just discuss that with this model, achieving biologically reasonable parameter values and a good fit to data are sometimes two different things. In our case, we just want a representative value of the growth (as in time to reach average maximum size in the population) that accounts for the entire growth trajectory of an individual, for each area and cohort.

Calculate sample size

sample_size <- IVBG %>%
  group_by(area) %>% 
  summarise(n_cohort = length(unique(cohort)),
            min_cohort = min(cohort),
            max_cohort = max(cohort),
            n_individuals = length(unique(ID)),
            n_data_points = n())
group_by: one grouping variable (area)
summarise: now 10 rows and 6 columns, ungrouped
sample_size
# A tibble: 10 × 6
   area  n_cohort min_cohort max_cohort n_individuals n_data_points
   <chr>    <int>      <dbl>      <dbl>         <int>         <int>
 1 BS          17       1985       2001          1334          7688
 2 BT          22       1972       1995           961          5371
 3 FB          47       1969       2015          6040         37381
 4 FM          37       1963       1999          5057         32642
 5 HO          29       1982       2015          1150          6210
 6 JM          60       1953       2014          4868         28749
 7 MU          18       1981       1999          1104          6666
 8 RA          14       1985       2003           572          3122
 9 SI_EK       24       1968       2011           658          3571
10 SI_HA       38       1967       2013          1861         10623
sample_size %>%
  ungroup() %>%
  summarise(sum_ind = sum(n_individuals), sum_n = sum(n_data_points))
ungroup: no grouping variables
summarise: now one row and 2 columns, ungrouped
# A tibble: 1 × 2
  sum_ind  sum_n
    <int>  <int>
1   23605 142023
write_csv(sample_size, paste0(home, "/output/sample_size.csv"))

Add GAM-predicted temperature to growth models

pred_temp <- read_csv(paste0(home, "/output/gam_predicted_temps.csv")) %>% 
  rename(cohort = year)
Rows: 380 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): area, model
dbl (2): year, temp

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rename: renamed one variable (cohort)
VBG <- VBG %>%
  left_join(pred_temp, by = c("area", "cohort"))
left_join: added 2 columns (temp, model)
           > rows only in x     0
           > rows only in y  ( 74)
           > matched rows     306
           >                 =====
           > rows total       306
# Save data for map-plot
cohort_sample_size <- IVBG %>%
  group_by(area, cohort) %>% 
  summarise(n = n()) # individuals, not samples!
group_by: 2 grouping variables (area, cohort)
summarise: now 306 rows and 3 columns, one group variable remaining (area)
VBG <- left_join(VBG, cohort_sample_size, by = c("cohort", "area"))
left_join: added one column (n)
           > rows only in x     0
           > rows only in y  (  0)
           > matched rows     306
           >                 =====
           > rows total       306
write_csv(VBG, paste0(home, "/output/vbg.csv"))

# Calculate the plotting order (also used for map plot)
order <- VBG %>%
  ungroup() %>%
  group_by(area) %>%
  summarise(min_temp = min(temp)) %>%
  arrange(desc(min_temp))
ungroup: no grouping variables
group_by: one grouping variable (area)
summarise: now 10 rows and 2 columns, ungrouped
order
# A tibble: 10 × 2
   area  min_temp
   <chr>    <dbl>
 1 SI_HA     7.83
 2 BS        6.22
 3 BT        5.87
 4 FM        5.87
 5 SI_EK     5.49
 6 MU        5.16
 7 FB        5.04
 8 HO        3.99
 9 JM        3.37
10 RA        3.06
write_csv(order, paste0(home, "/output/ranked_temps.csv"))

nareas <- length(unique(order$area)) + 2 # to skip the brightest colors that are hard to read
colors <- colorRampPalette(brewer.pal(name = "RdYlBu", n = 10))(nareas)[-c(6,7)]

Plot VBGE fits

# Sample 30 IDs per area and plot their data and VBGE fits
set.seed(4)
ids <- IVBG %>% distinct(ID, .keep_all = TRUE) %>% group_by(area) %>% slice_sample(n = 30)
distinct: removed 118,418 rows (83%), 23,605 rows remaining
group_by: one grouping variable (area)
slice_sample (grouped): removed 23,305 rows (99%), 300 rows remaining
IVBG2 <- IVBG %>%
  filter(ID %in% ids$ID) %>% 
  distinct(ID, .keep_all = TRUE) %>% 
  select(ID, k, linf)
filter: removed 140,319 rows (99%), 1,704 rows remaining
distinct: removed 1,404 rows (82%), 300 rows remaining
select: dropped 10 variables (k_se, linf_se, k_lwr, k_upr, linf_lwr, …)
d2 <- d %>%
  ungroup() %>%
  filter(ID %in% ids$ID) %>%
  left_join(IVBG2, by = "ID") %>%
  mutate(length_mm_pred = linf*(1-exp(-k*age_bc)))
ungroup: no grouping variables
filter: removed 286,712 rows (99%), 1,704 rows remaining
left_join: added 2 columns (k, linf)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     1,704
           >                 =======
           > rows total       1,704
mutate: new variable 'length_mm_pred' (double) with 1,697 unique values and 0% NA
fits_ind <- ggplot(d2, aes(age_bc, length_mm, group = ID, color = ID)) +
  geom_jitter(width = 0.3, height = 0, alpha = 0.5, size = 0.4) +
  geom_line(aes(age_bc, length_mm_pred, group = ID, color = ID),
            inherit.aes = FALSE, alpha = 0.8, linewidth = 0.3) + 
  guides(color = "none") +
  scale_color_viridis(discrete = TRUE, name = "Site", option = "cividis") +
  labs(x = "Age (years)", y = "Length (mm)") +
  facet_wrap(~factor(area, levels = (arrange(area_attr, desc(lat)))$area), ncol = 5) + 
  NULL

k <- IVBG %>% 
  ggplot(aes(factor(area, order$area), k, fill = factor(area, order$area))) + 
  coord_cartesian(ylim = c(0, 0.7)) +
  geom_violin(alpha = 0.8, color = NA) +  
  geom_boxplot(outlier.color = NA, width = 0.2, alpha = 0.9, fill = NA, size = 0.4) +
  scale_fill_manual(values = colors, name = "Site") +
  guides(fill = "none", color = "none") +
  labs(x = "Site", y = expression(italic(k))) + 
  coord_flip()
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
linf <- IVBG %>% 
  filter(linf < 2300) %>% 
  filter(linf > 130) %>% 
  ggplot(aes(factor(area, order$area), linf, fill = factor(area, order$area))) + 
  geom_violin(alpha = 0.8, color = NA) +  
  geom_boxplot(outlier.color = NA, width = 0.1, alpha = 0.9, fill = NA, size = 0.4) +
  coord_cartesian(ylim = c(0, 2000)) +
  scale_fill_manual(values = colors, name = "Site") +
  guides(fill = "none", color = "none") +
  labs(x = "", y = expression(paste(italic(L[infinity]), " [mm]"))) +
  theme(axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) + 
  coord_flip()
filter: removed 1,218 rows (1%), 140,805 rows remaining
filter: no rows removed
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
#fits_ind / (k + linf) + plot_annotation(tag_levels = "A") + plot_layout(heights = c(1, 1.8))

k2 <- k + coord_cartesian(ylim = c(0, 0.65))
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
fits_ind / k2 + plot_annotation(tag_levels = "A") #+ plot_layout(heights = c(1, 1.8))

ggsave(paste0(home, "/figures/vb_pars_fits.pdf" ), width = 17, height = 18, units = "cm")

Fit Sharpe-Schoolfield model to k

dat <- VBG %>%
  select(k_median, temp, area) %>%
  rename(rate = k_median)
select: dropped 7 variables (cohort, k_mean, linf_median, k_lwr, k_upr, …)
rename: renamed one variable (rate)

Can we fit a single Sharpe Schoolfield with area-specific parameters with brms?

# Again, here are the data we are fitting:
ggplot(dat, aes(temp, rate, color = factor(area, order$area))) + 
  geom_point(data = dat, aes(temp, rate, color = factor(area, order$area)), size = 0.6)  

hist(dat$rate)

# Here's the equation (from the r package rTPC):
# > sharpeschoolhigh_1981
# function (temp, r_tref, e, eh, th, tref) 
# {
#     tref <- 273.15 + tref
#     k <- 8.62e-05
#     boltzmann.term <- r_tref * exp(e/k * (1/tref - 1/(temp + 
#         273.15)))
#     inactivation.term <- 1/(1 + exp(eh/k * (1/(th + 273.15) - 
#         1/(temp + 273.15))))
#     return(boltzmann.term * inactivation.term)
# }

# Add in fixed parameters
dat$bk <- 8.62e-05
dat$tref <- 8 + 273.15

# (better visualization including bounds further down)
n = 10000
hist(rnorm(0.8, 1, n = n), main = "e", xlim = c(0, 5))

hist(rnorm(2, 1, n = n), main = "eh", xlim = c(0, 7))

hist(rnorm(0.25, 0.5, n = n), main = "r_tref", xlim = c(0, 2.5))

hist(rnorm(12, 5, n = n), main = "th", xlim = c(5, 30))

# These work nicely with the pp_check
prior <- c(prior(normal(0.8, 1), nlpar = "e", lb = 0),
           prior(normal(2, 1), nlpar = "eh", lb = 0),
           prior(normal(0.3, 0.5), nlpar = "rtref", lb = 0),
           prior(normal(10, 5), nlpar = "th", lb = 0))

# Now to a prior predictive check
fit_prior <- brm(
  bf(rate ~ (rtref * exp(e/bk * (1/tref - 1/(temp + 273.15)))) / (1 + exp(eh/bk * (1/(th + 273.15) - 1/(temp + 273.15)))),
     rtref + e + eh + th ~ 1, 
     nl = TRUE),
  data = dat,
  cores = 2,
  chains = 2,
  iter = 1500,
  sample_prior = "only",
  seed = 9,
  prior = prior
)
Compiling Stan program...
Start sampling
# Global prior predictive check in relation to data. Doesn't loo too informative...
dat %>%
  data_grid(temp = seq_range(temp, n = 51)) %>% 
  ungroup() %>% 
  mutate(bk = 8.62e-05,
         tref = 8 + 273.15) %>% 
  add_epred_draws(fit_prior) %>% 
  ggplot(aes(temp, y = .epred)) +
  stat_lineribbon(.width = c(0.75), alpha = 0.3, color = "black", fill = "black") + 
  geom_point(data = dat, aes(temp, rate, color = factor(area, order$area)), size = 0.6) +
  scale_color_manual(values = colors, name = "Site") +
  labs(y = "Expectation of the posterior predictive distribution", x = "Temperature (°C)") +
  NULL
ungroup: no grouping variables
mutate: new variable 'bk' (double) with one unique value and 0% NA
        new variable 'tref' (double) with one unique value and 0% NA

ggsave(paste0(home, "/figures/supp/prior_predictive_check.pdf" ), width = 15, height = 11, units = "cm")

# Now make sure it converges with real data but no random effects
fit_global <- brm(
  bf(rate ~ (rtref * exp(e/bk * (1/tref - 1/(temp + 273.15)))) / (1 + exp(eh/bk * (1/(th + 273.15) - 1/(temp + 273.15)))),
     rtref + e + eh + th ~ 1,
     nl = TRUE),
  data = dat,
  cores = 4,
  chains = 4,
  iter = 4000, 
  seed = 9,
  sample_prior = "yes",
  family = student,
  prior = prior
  )
Compiling Stan program...
Start sampling
plot(fit_global)

pp_check(fit_global)
Using 10 posterior draws for ppc type 'dens_overlay' by default.

plot(conditional_effects(fit_global, effect = "temp"), points = TRUE)

# Plot prior vs posterior
post <- fit_global %>%
  as_draws_df() %>%
  dplyr::select(b_rtref_Intercept, b_e_Intercept, b_eh_Intercept, b_th_Intercept) %>% 
  rename(r_tref = b_rtref_Intercept, 
         e = b_e_Intercept,
         eh = b_eh_Intercept,
         th = b_th_Intercept) %>% 
  pivot_longer(everything(), names_to = "parameter") %>% 
  mutate(type = "Posterior")
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
prior_draws <- fit_global %>%
  as_draws_df() %>%
  dplyr::select(prior_b_rtref, prior_b_e, prior_b_eh, prior_b_th) %>% 
  rename(r_tref = prior_b_rtref, 
         e = prior_b_e,
         eh = prior_b_eh,
         th = prior_b_th) %>% 
  pivot_longer(everything(), names_to = "parameter") %>% 
  mutate(type = "Prior")
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
dist <- bind_rows(prior_draws, post)

ggplot(dist, aes(value, fill = type)) +
  geom_density(color = NA, alpha = 0.5) +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(~parameter, scales = "free") +
  theme(legend.position = c(0.35, 0.9)) +
  labs(fill = "")

Ok, seems to work with priors and everything. They are roughly as broad as can be and still have a fit. Now fit the full model, with random area effects, more iterations and chains!

# Now look at random area effects!
nrow(dat)
[1] 306
# Student-t model with random effects
fitbs <- brm(
  bf(rate ~ (rtref * exp(e/bk * (1/tref - 1/(temp + 273.15)))) / (1 + exp(eh/bk * (1/(th + 273.15) - 1/(temp + 273.15)))),
     rtref + e + eh + th ~ 1 + (1|area),
     nl = TRUE),
  data = dat,
  family = student,
  cores = 4,
  chains = 4,
  iter = 5000,
  sample_prior = "yes",
  save_pars = save_pars(all = TRUE),
  seed = 9,
  prior = prior,
  control = list(adapt_delta = 0.99, max_treedepth = 12)
)
Compiling Stan program...
Start sampling
print(fitbs, digits = 4)
 Family: student 
  Links: mu = identity; sigma = identity; nu = identity 
Formula: rate ~ (rtref * exp(e/bk * (1/tref - 1/(temp + 273.15))))/(1 + exp(eh/bk * (1/(th + 273.15) - 1/(temp + 273.15)))) 
         rtref ~ 1 + (1 | area)
         e ~ 1 + (1 | area)
         eh ~ 1 + (1 | area)
         th ~ 1 + (1 | area)
   Data: dat (Number of observations: 306) 
  Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
         total post-warmup draws = 10000

Group-Level Effects: 
~area (Number of levels: 10) 
                    Estimate Est.Error l-95% CI u-95% CI   Rhat Bulk_ESS
sd(rtref_Intercept)   0.0448    0.0359   0.0025   0.1362 1.0017     2475
sd(e_Intercept)       0.1804    0.1253   0.0090   0.4791 1.0015     3656
sd(eh_Intercept)      0.2939    0.2872   0.0085   1.0210 1.0021     2123
sd(th_Intercept)      0.8778    0.7823   0.0360   2.9035 1.0030     3243
                    Tail_ESS
sd(rtref_Intercept)     3885
sd(e_Intercept)         4146
sd(eh_Intercept)        4716
sd(th_Intercept)        4761

Population-Level Effects: 
                Estimate Est.Error l-95% CI u-95% CI   Rhat Bulk_ESS Tail_ESS
rtref_Intercept   0.4282    0.1992   0.2162   0.9758 1.0014     2067     3988
e_Intercept       0.8406    0.4492   0.1791   1.8838 1.0024     1977     3497
eh_Intercept      1.5944    0.4782   0.5949   2.5274 1.0008     3774     2928
th_Intercept      9.1701    3.7813   3.1253  17.1805 1.0021     1942     4389

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI   Rhat Bulk_ESS Tail_ESS
sigma   0.0397    0.0020   0.0357   0.0437 1.0006    11173     7099
nu     24.6701   13.2105   8.0216  58.5110 0.9999    11296     7653

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Inspect the model

# Check fit

library(performance)

Attaching package: 'performance'
The following objects are masked from 'package:modelr':

    mae, mse, rmse
fitbs
 Family: student 
  Links: mu = identity; sigma = identity; nu = identity 
Formula: rate ~ (rtref * exp(e/bk * (1/tref - 1/(temp + 273.15))))/(1 + exp(eh/bk * (1/(th + 273.15) - 1/(temp + 273.15)))) 
         rtref ~ 1 + (1 | area)
         e ~ 1 + (1 | area)
         eh ~ 1 + (1 | area)
         th ~ 1 + (1 | area)
   Data: dat (Number of observations: 306) 
  Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
         total post-warmup draws = 10000

Group-Level Effects: 
~area (Number of levels: 10) 
                    Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(rtref_Intercept)     0.04      0.04     0.00     0.14 1.00     2475     3885
sd(e_Intercept)         0.18      0.13     0.01     0.48 1.00     3656     4146
sd(eh_Intercept)        0.29      0.29     0.01     1.02 1.00     2123     4716
sd(th_Intercept)        0.88      0.78     0.04     2.90 1.00     3243     4761

Population-Level Effects: 
                Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
rtref_Intercept     0.43      0.20     0.22     0.98 1.00     2067     3988
e_Intercept         0.84      0.45     0.18     1.88 1.00     1977     3497
eh_Intercept        1.59      0.48     0.59     2.53 1.00     3774     2928
th_Intercept        9.17      3.78     3.13    17.18 1.00     1942     4389

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.04      0.00     0.04     0.04 1.00    11173     7099
nu       24.67     13.21     8.02    58.51 1.00    11296     7653

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
r2_bayes(fitbs)
# Bayesian R2 with Compatibility Interval

  Conditional R2: 0.374 (95% CI [0.301, 0.441])
r2_bayes(fit_global)
# Bayesian R2 with Compatibility Interval

  Conditional R2: 0.119 (95% CI [0.056, 0.185])
# qq-plots
p1 <- dat %>%
  add_predicted_draws(fitbs) %>%
  summarise(
    p_residual = mean(.prediction < rate),
    z_residual = qnorm(p_residual),
    .groups = "drop_last"
  ) %>%
  ggplot(aes(sample = z_residual)) +
  geom_qq() +
  geom_abline() + 
  labs(x = "Theoretical", y = "Sample") + 
  ggtitle("Mixed model") +
  theme(aspect.ratio = 1)
summarise: now 306 rows and 8 columns, 5 group variables remaining (rate, temp, area, bk, tref)
p2 <- dat %>%
  add_predicted_draws(fit_global) %>%
  summarise(
    p_residual = mean(.prediction < rate),
    z_residual = qnorm(p_residual),
    .groups = "drop_last"
  ) %>%
  ggplot(aes(sample = z_residual)) +
  geom_qq() +
  geom_abline() + 
  labs(x = "Theoretical", y = "Sample") + 
  ggtitle("Global model") +
  theme(aspect.ratio = 1)
summarise: now 306 rows and 8 columns, 5 group variables remaining (rate, temp, area, bk, tref)
p3 <- pp_check(fitbs) + theme(legend.position = c(0.1, 0.9)) + ggtitle("Mixed model")
Using 10 posterior draws for ppc type 'dens_overlay' by default.
p4 <- pp_check(fit_global) + theme(legend.position = c(0.1, 0.9)) + ggtitle("Global model")
Using 10 posterior draws for ppc type 'dens_overlay' by default.
(p1 + p2) / (p3 + p4)

ggsave(paste0(home, "/figures/supp/sharpe_brms_ppcheck_qq.pdf" ), width = 17, height = 17, units = "cm")

# Plot prior vs posterior
post <- fitbs %>%
  as_draws_df() %>%
  dplyr::select(b_rtref_Intercept, b_e_Intercept, b_eh_Intercept, b_th_Intercept) %>% 
  rename(r_tref = b_rtref_Intercept, 
         e = b_e_Intercept,
         eh = b_eh_Intercept,
         th = b_th_Intercept) %>% 
  pivot_longer(everything(), names_to = "parameter") %>% 
  mutate(type = "Posterior")
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 10000x4, now 40000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
prior_draws <- fitbs %>%
  as_draws_df() %>%
  dplyr::select(prior_b_rtref, prior_b_e, prior_b_eh, prior_b_th) %>% 
    rename(r_tref = prior_b_rtref, 
         e = prior_b_e,
         eh = prior_b_eh,
         th = prior_b_th) %>% 
  pivot_longer(everything(), names_to = "parameter") %>% 
  mutate(type = "Prior")
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 10000x4, now 40000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
dist <- bind_rows(prior_draws, post)

ggplot(dist, aes(value, fill = type)) +
  geom_density(color = NA, alpha = 0.5) +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(~parameter, scales = "free") + 
  theme(legend.position = c(0.35, 0.9)) +
  labs(fill = "", x = "Value", y = "Density")

ggsave(paste0(home, "/figures/supp/sharpe_brms_prior.pdf" ), width = 17, height = 17, units = "cm")

Sensitivity with respect to priors

# Set a bunch of priors with different sigmas
  # prior1 <- c(prior(normal(0.8, 1), nlpar = "e", lb = 0),
  #             prior(normal(2, 1), nlpar = "eh", lb = 0),
  #             prior(normal(0.3, 0.5), nlpar = "rtref", lb = 0),
  #             prior(normal(10, 5), nlpar = "th", lb = 0))

# For each object, we increase sd by a factor 1.5 from previous
  prior2 <- c(prior(normal(0.8, 1.5), nlpar = "e", lb = 0),
              prior(normal(2, 1.5), nlpar = "eh", lb = 0),
              prior(normal(0.3, 0.75), nlpar = "rtref", lb = 0),
              prior(normal(10, 7.5), nlpar = "th", lb = 0))
  
  prior3 <- c(prior(normal(0.8, 3), nlpar = "e", lb = 0),
              prior(normal(2, 3), nlpar = "eh", lb = 0),
              prior(normal(0.3, 1.125), nlpar = "rtref", lb = 0),
              prior(normal(10, 11.25), nlpar = "th", lb = 0))
  
  prior4 <- c(prior(normal(0.8, 4.5), nlpar = "e", lb = 0),
              prior(normal(2, 4.5), nlpar = "eh", lb = 0),
              prior(normal(0.3, 1.6875), nlpar = "rtref", lb = 0),
              prior(normal(10, 16.875), nlpar = "th", lb = 0))
  
  prior5 <- c(prior(normal(0.8, 6.75), nlpar = "e", lb = 0),
              prior(normal(2, 6.75), nlpar = "eh", lb = 0),
              prior(normal(0.3, 2.53125), nlpar = "rtref", lb = 0),
              prior(normal(10, 25.3125), nlpar = "th", lb = 0))
  
  prior6 <- c(prior(normal(0.8, 10.125), nlpar = "e", lb = 0),
              prior(normal(2, 10.125), nlpar = "eh", lb = 0),
              prior(normal(0.3, 3.796875), nlpar = "rtref", lb = 0),
              prior(normal(10, 37.96875), nlpar = "th", lb = 0))

  priors <- list(prior2, prior3, prior4, prior5, prior6)

# List for storing posteriors and priors
dist_list <- list()
  
for(i in 1:5) {
  
  prior <- priors[[i]]
  
  fit_sens <- brm(
    bf(rate ~ (rtref * exp(e/bk * (1/tref - 1/(temp + 273.15)))) / (1 + exp(eh/bk * (1/(th + 273.15) - 1/(temp + 273.15)))),
       rtref + e + eh + th ~ 1,
       nl = TRUE),
    data = dat,
    cores = 4,
    chains = 4,
    iter = 4000, 
    seed = 9,
    sample_prior = "yes",
    family = student,
    prior = prior
    )
  
  # Plot prior vs posterior
  post <- fit_sens %>%
    as_draws_df() %>%
    dplyr::select(b_rtref_Intercept, b_e_Intercept, b_eh_Intercept, b_th_Intercept) %>% 
    rename(r_tref = b_rtref_Intercept, 
           e = b_e_Intercept,
           eh = b_eh_Intercept,
           th = b_th_Intercept) %>% 
    pivot_longer(everything(), names_to = "parameter") %>% 
    mutate(type = "Posterior")

  prior_draws <- fit_sens %>%
    as_draws_df() %>%
    dplyr::select(prior_b_rtref, prior_b_e, prior_b_eh, prior_b_th) %>% 
    rename(r_tref = prior_b_rtref, 
           e = prior_b_e,
           eh = prior_b_eh,
           th = prior_b_th) %>% 
    pivot_longer(everything(), names_to = "parameter") %>% 
    mutate(type = "Prior")
  
  dist <- bind_rows(prior_draws, post) %>% 
    mutate(iter = i)
  
  dist_list[[i]] <- dist
  
}
Compiling Stan program...
Start sampling
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
mutate: new variable 'iter' (integer) with one unique value and 0% NA
Compiling Stan program...
Start sampling
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
mutate: new variable 'iter' (integer) with one unique value and 0% NA
Compiling Stan program...
Start sampling
Warning: There were 2 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
mutate: new variable 'iter' (integer) with one unique value and 0% NA
Compiling Stan program...
Start sampling
Warning: There were 16 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
mutate: new variable 'iter' (integer) with one unique value and 0% NA
Compiling Stan program...
Start sampling
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
Warning: Dropping 'draws_df' class as required metadata was removed.
rename: renamed 4 variables (r_tref, e, eh, th)
pivot_longer: reorganized (r_tref, e, eh, th) into (parameter, value) [was 8000x4, now 32000x2]
mutate: new variable 'type' (character) with one unique value and 0% NA
mutate: new variable 'iter' (integer) with one unique value and 0% NA
dist_df <- bind_rows(dist_list)

write_csv(dist_df, paste0(home, "/output/prior_sens.csv"))

ggplot(dist_df, aes(value, fill = type)) +
  geom_density(color = NA, alpha = 0.5) +
  scale_fill_brewer(palette = "Set1") +
  ggh4x::facet_grid2(iter~parameter, scales = "free", independent = "y") +
  #coord_cartesian(xlim = c(0, 50)) +
  labs(fill = "") + 
  labs(x = "Value", y = "Density")

ggsave(paste0(home, "/figures/supp/prior_sensi.pdf"), width = 17, height = 17, units = "cm")

ggplot(dist_df %>% filter(parameter == "r_tref"), aes(value, color = as.factor(iter), 
                    linetype = as.factor(type))) +
  geom_density(fill = NA) +
  coord_cartesian(xlim = c(0, 4)) +
  scale_color_viridis(discrete = TRUE) +
  facet_wrap(~parameter, scales = "free") +
  labs(fill = "", linetype = "", color = "iteration")
filter: removed 240,000 rows (75%), 80,000 rows remaining

ggplot(dist_df %>% filter(parameter == "e"), aes(value, color = as.factor(iter), 
                    linetype = as.factor(type))) +
  geom_density(fill = NA) +
  coord_cartesian(xlim = c(0, 12)) +
  scale_color_viridis(discrete = TRUE) +
  facet_wrap(~parameter, scales = "free") +
  labs(fill = "", linetype = "", color = "iteration")
filter: removed 240,000 rows (75%), 80,000 rows remaining

ggplot(dist_df %>% filter(parameter == "eh"), aes(value, color = as.factor(iter), 
                    linetype = as.factor(type))) +
  geom_density(fill = NA) +
  coord_cartesian(xlim = c(0, 10)) +
  scale_color_viridis(discrete = TRUE) +
  facet_wrap(~parameter, scales = "free") +
  labs(fill = "", linetype = "", color = "iteration")
filter: removed 240,000 rows (75%), 80,000 rows remaining

ggplot(dist_df %>% filter(parameter == "th"), aes(value, color = as.factor(iter), 
                    linetype = as.factor(type))) +
  geom_density(fill = NA) +
  coord_cartesian(xlim = c(0, 35)) +
  scale_color_viridis(discrete = TRUE) +
  facet_wrap(~parameter, scales = "free") +
  labs(fill = "", linetype = "", color = "iteration")
filter: removed 240,000 rows (75%), 80,000 rows remaining

dist_df %>% 
  group_by(iter, parameter, type) %>% 
  summarise(mean = mean(value),
            sd = sd(value)) %>% 
  ggplot(aes(iter, mean, color = type)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0,
                position = position_dodge(width = 0.2)) +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(~parameter, scales = "free")
group_by: 3 grouping variables (iter, parameter, type)
summarise: now 40 rows and 5 columns, 2 group variables remaining (iter, parameter)

dist_df %>% 
  group_by(iter, parameter, type) %>% 
  summarise(mean = mean(value)) %>% 
  ggplot(aes(iter, mean, color = type)) +
  geom_point(position = position_dodge(width = 0.2)) +
  scale_fill_brewer(palette = "Set1") +
  facet_wrap(~parameter, scales = "free")
group_by: 3 grouping variables (iter, parameter, type)
summarise: now 40 rows and 4 columns, 2 group variables remaining (iter, parameter)

dist_df %>% 
  group_by(iter, parameter, type) %>% 
  summarise(mean = mean(value),
            sd = sd(value)) %>% 
  arrange(parameter) %>% 
  as.data.frame()
group_by: 3 grouping variables (iter, parameter, type)
summarise: now 40 rows and 5 columns, 2 group variables remaining (iter, parameter)
   iter parameter      type       mean         sd
1     1         e Posterior  1.5666273  0.5433500
2     1         e     Prior  1.5400261  1.0644415
3     2         e Posterior  1.7985632  0.6605803
4     2         e     Prior  2.7386087  1.9614945
5     3         e Posterior  1.9682942  0.7303356
6     3         e     Prior  3.8873451  2.8627769
7     4         e Posterior  2.0599463  0.8300683
8     4         e     Prior  5.7127923  4.1759926
9     5         e Posterior  2.0758570  0.8143482
10    5         e     Prior  8.3195165  6.2412788
11    1        eh Posterior  2.2812624  0.3784660
12    1        eh     Prior  2.2622283  1.2852999
13    2        eh Posterior  2.4482207  0.4904633
14    2        eh     Prior  3.2391851  2.1680067
15    3        eh Posterior  2.5684470  0.5647276
16    3        eh     Prior  4.4407647  3.1300813
17    4        eh Posterior  2.6476977  0.6623166
18    4        eh     Prior  6.2314744  4.5150719
19    5        eh Posterior  2.6499346  0.6467294
20    5        eh     Prior  8.8776777  6.4459986
21    1    r_tref Posterior  0.5673703  0.2452419
22    1    r_tref     Prior  0.7213955  0.5110758
23    2    r_tref Posterior  0.6845378  0.3484815
24    2    r_tref     Prior  1.0063458  0.7296217
25    3    r_tref Posterior  0.7865568  0.4339258
26    3    r_tref     Prior  1.4566510  1.0666056
27    4    r_tref Posterior  0.8618913  0.5693465
28    4    r_tref     Prior  2.1003932  1.5515327
29    5    r_tref Posterior  0.8867666  0.6248196
30    5    r_tref     Prior  3.1245936  2.3342626
31    1        th Posterior  7.2228294  2.1934366
32    1        th     Prior 11.4702358  6.4450215
33    2        th Posterior  6.6603216  2.1071689
34    2        th     Prior 13.6986706  8.5624844
35    3        th Posterior  6.2672586  1.9774241
36    3        th     Prior 18.0709757 11.9736635
37    4        th Posterior  6.1780863  2.0086133
38    4        th     Prior 24.1862371 17.2734013
39    5        th Posterior  6.0768756  1.9768899
40    5        th     Prior 34.6299826 25.0058037

Calculate T_opt

ts <- dat %>% 
  data_grid(temp = seq_range(temp, n = 100)) %>% 
  mutate(bk = 8.62e-05,
         tref = 8 + 273.15) %>% 
  add_epred_draws(fit_global, re_formula = NA, ndraws = 8000) 
mutate: new variable 'bk' (double) with one unique value and 0% NA
        new variable 'tref' (double) with one unique value and 0% NA
p1 <- ggplot(ts) + 
  geom_line(aes(temp, y = .epred, group = .draw), alpha = .1) + 
  coord_cartesian(ylim = c(0.1, 0.4))

p2 <- ggplot(ts) + 
  geom_line(aes(temp, y = .epred, group = .draw), alpha = .1) +
  coord_cartesian(ylim = c(0.1, 0.4))

p1 + p2

# Compute quantiles across spaghetties
t_opt <- ts %>% 
  group_by(.draw) %>% 
  filter(.epred == max(.epred)) %>% 
  ungroup() %>% 
  filter(!temp == min(temp)) %>% # remove values where there was no optimum
  filter(!temp == max(temp))
group_by: one grouping variable (.draw)
filter (grouped): removed 792,000 rows (99%), 8,000 rows remaining
ungroup: no grouping variables
filter: removed 2 rows (<1%), 7,998 rows remaining
filter: removed 2 rows (<1%), 7,996 rows remaining
# These are used in the fit-temp script
quantile(t_opt$temp, probs = c(0.025, 0.5, 0.975))
     2.5%       50%     97.5% 
 8.632517  9.468607 10.304697 
ggplot(t_opt, aes(temp)) + 
  geom_histogram(bins = length(unique(t_opt$temp))) + 
  theme_void()

# Make the main plot (conditional effect of temperature, with and without random effects)
# Predictions without random effects
glob <- dat %>%
  data_grid(temp = seq_range(temp, n = 100)) %>% 
  mutate(bk = 8.62e-05,
         tref = 8 + 273.15) %>% 
  add_epred_draws(fit_global, re_formula = NA) %>% #
  ungroup()
mutate: new variable 'bk' (double) with one unique value and 0% NA
        new variable 'tref' (double) with one unique value and 0% NA
ungroup: no grouping variables
dat %>%
  group_by(area) %>%
  data_grid(temp = seq_range(temp, n = 100)) %>% 
  ungroup() %>% 
  mutate(bk = 8.62e-05,
         tref = 8 + 273.15) %>% 
  add_epred_draws(fitbs) %>% 
  ungroup() %>% 
  ggplot(aes(temp, y = .epred, color = factor(area, order$area))) +
  stat_lineribbon(data = glob, aes(temp, .epred), .width = c(0.9), inherit.aes = FALSE,
                 fill = "black", color = "black", alpha = 0.1) +
  geom_point(data = dat, aes(temp, rate, color = factor(area, order$area)), size = 1, alpha = 0.8) +
  stat_lineribbon(.width = c(0)) +
  stat_lineribbon(data = glob, aes(temp, .epred), .width = c(0), inherit.aes = FALSE,
                 color = "black", alpha = 0.9, linetype = 2) +
  coord_cartesian(expand = 0) +
  scale_color_manual(values = colors, name = "Site") +
  guides(fill = "none",
         color = guide_legend(nrow = 1, reverse = TRUE, title.position = "top", title.hjust = 0.5,
                              override.aes = list(size = 1))) +
  theme(axis.title.y = ggtext::element_markdown(),
        legend.position = "bottom",
        legend.direction = "horizontal") +
  labs(x = "Temperature (°C)",
       y = "von Bertalanffy growth coefficient (*k*)") + 
  NULL
group_by: one grouping variable (area)
ungroup: no grouping variables
mutate: new variable 'bk' (double) with one unique value and 0% NA
        new variable 'tref' (double) with one unique value and 0% NA
ungroup: no grouping variables

ggsave(paste0(home, "/figures/sharpe_school_bayes.pdf"), width = 17, height = 17, units = "cm")
#ggsave(paste0(home, "/figures/for-talks/sharpe_school_bayes.pdf"), width = 14, height = 14, units = "cm")

Area-specific T_opts as a ridgeplot

# Calculate T_opt by area
tsa <- dat %>% 
  data_grid(area = unique(dat$area),
            temp = seq_range(temp, n = 100)) %>% 
  mutate(bk = 8.62e-05,
         tref = 8 + 273.15) %>% 
  add_epred_draws(fitbs, re_formula = NULL, ndraws = 10000) 
mutate: new variable 'bk' (double) with one unique value and 0% NA
        new variable 'tref' (double) with one unique value and 0% NA
t_opt_a <- tsa %>% 
  group_by(.draw, area) %>% 
  filter(.epred == max(.epred)) %>% 
  ungroup() %>% 
  filter(!temp == min(temp)) %>% # remove values where there was no optimum
  filter(!temp == max(temp))
group_by: 2 grouping variables (.draw, area)
filter (grouped): removed 9,900,000 rows (99%), 100,000 rows remaining
ungroup: no grouping variables
filter: removed 4,362 rows (4%), 95,638 rows remaining
filter: removed 8,688 rows (9%), 86,950 rows remaining
t_opt_a_sum <- t_opt_a %>% 
  group_by(area) %>% 
  summarise(median = median(temp),
            lwr = quantile(temp, probs = 0.1),
            upr = quantile(temp, probs = 0.9))
group_by: one grouping variable (area)
summarise: now 10 rows and 4 columns, ungrouped
# How many datapoints are below or above optimum by area and globally?
left_join(dat, t_opt_a_sum, by = "area") %>% 
  filter(temp < median) %>% 
  nrow()
left_join: added 3 columns (median, lwr, upr)
           > rows only in x     0
           > rows only in y  (  0)
           > matched rows     306
           >                 =====
           > rows total       306
filter: removed 117 rows (38%), 189 rows remaining
[1] 189
left_join(dat, t_opt_a_sum, by = "area") %>% 
  mutate(above = ifelse(temp > median, "Y", "N")) %>% 
  group_by(area) %>% 
  count(above) %>% 
  pivot_wider(names_from = above, values_from = n) %>% 
  mutate(prop = Y/(Y+N)) %>% 
  arrange(desc(prop))
left_join: added 3 columns (median, lwr, upr)
           > rows only in x     0
           > rows only in y  (  0)
           > matched rows     306
           >                 =====
           > rows total       306
mutate: new variable 'above' (character) with 2 unique values and 0% NA
group_by: one grouping variable (area)
count: now 16 rows and 3 columns, one group variable remaining (area)
pivot_wider: reorganized (above, n) into (N, Y) [was 16x3, now 10x3]
mutate (grouped): new variable 'prop' (double) with 7 unique values and 40% NA
# A tibble: 10 × 4
# Groups:   area [10]
   area      N     Y    prop
   <chr> <int> <int>   <dbl>
 1 FM       10    27  0.730 
 2 BT        7    15  0.682 
 3 SI_EK    14    10  0.417 
 4 JM       37    23  0.383 
 5 FB       44     3  0.0638
 6 BS       16     1  0.0588
 7 HO       29    NA NA     
 8 MU       18    NA NA     
 9 RA       14    NA NA     
10 SI_HA    NA    38 NA     
# Without impacted?
left_join(dat, t_opt_a_sum, by = "area") %>% 
  filter(!area %in% c("BT", "SI_EK")) %>% 
  filter(temp < median) %>% 
  nrow()
left_join: added 3 columns (median, lwr, upr)
           > rows only in x     0
           > rows only in y  (  0)
           > matched rows     306
           >                 =====
           > rows total       306
filter: removed 46 rows (15%), 260 rows remaining
filter: removed 92 rows (35%), 168 rows remaining
[1] 168
dat %>% 
  group_by(area) %>% 
  summarise(n = n()) %>% 
  arrange(desc(n))
group_by: one grouping variable (area)
summarise: now 10 rows and 2 columns, ungrouped
# A tibble: 10 × 2
   area      n
   <chr> <int>
 1 JM       60
 2 FB       47
 3 SI_HA    38
 4 FM       37
 5 HO       29
 6 SI_EK    24
 7 BT       22
 8 MU       18
 9 BS       17
10 RA       14
# nrow(filter(dat, temp < 9.468607))
# nrow(dat)

ggplot(t_opt, aes(temp)) + 
  geom_histogram(bins = length(unique(t_opt$temp)))

# Compute quantiles across spaghetties
rect_dat <- data.frame(xmin = c(quantile(t_opt$temp, probs = c(0.05)), quantile(t_opt$temp, probs = c(0.1))),
                       xmax = c(quantile(t_opt$temp, probs = c(0.95)), quantile(t_opt$temp, probs = c(0.9))),
                       ymin = c(-Inf, -Inf),
                       ymax = c(Inf, Inf),
                       interval = c("90%", "80%"))

ridge <- ggplot(t_opt_a, aes(temp, factor(area, levels = rev(order$area)), fill = factor(area, levels = rev(order$area)))) + 
  geom_vline(xintercept = quantile(t_opt$temp, probs = c(0.5)), alpha = 0.6, linetype = 2, linewidth = 0.75) +
  geom_rect(data = rect_dat %>% filter(interval == "90%"), aes(xmin = xmin, xmax = xmax), ymin = -Inf, ymax = Inf, 
            inherit.aes = FALSE, fill = "grey", alpha = 0.35) +
  geom_rect(data = rect_dat %>% filter(interval == "80%"), aes(xmin = xmin, xmax = xmax), ymin = -Inf, ymax = Inf, 
            inherit.aes = FALSE, fill = "grey", alpha = 1) +
  geom_density_ridges(alpha = 0.7, color = NA, scale = 1,
                      stat = "binline", bins = 50) +
  scale_fill_manual(values = rev(colors), name = "Site") +
  scale_color_manual(values = rev(colors), name = "Site") +
  labs(x = "Temperature (°C)", y = "") +
  guides(fill = "none") +
  geom_errorbar(data = t_opt_a_sum, aes(xmin = lwr, xmax = upr, y = area, color = factor(area, levels = rev(order$area))), linewidth = 1,
                inherit.aes = FALSE, width = 0) +
  geom_point(data = t_opt_a_sum, aes(median, area, fill = factor(area, levels = rev(order$area))), shape = 21, color = "white", size = 3) +
  NULL
filter: removed one row (50%), one row remaining
filter: removed one row (50%), one row remaining
ggsave(paste0(home, "/figures/t_opt_ridges.pdf" ), width = 17, height = 17, units = "cm")

# Any patterns in the residuals (area spec and global)
t_opt_a_sum$global <- quantile(t_opt$temp, probs = c(0.5))

t_opt_a_sum$diff <- t_opt_a_sum$median - t_opt_a_sum$global

order2 <- VBG %>%
  ungroup() %>%
  group_by(area) %>%
  summarise(mean_temp = mean(temp)) %>%
  arrange(desc(mean_temp))
ungroup: no grouping variables
group_by: one grouping variable (area)
summarise: now 10 rows and 2 columns, ungrouped
t_opt_a_sum <- left_join(t_opt_a_sum, order2)
Joining with `by = join_by(area)`
left_join: added one column (mean_temp)
           > rows only in x    0
           > rows only in y  ( 0)
           > matched rows     10
           >                 ====
           > rows total       10
diff <- ggplot(t_opt_a_sum, aes(mean_temp, diff, label = area)) +
  geom_point(color = "gray10") + 
  geom_errorbar(aes(ymin = lwr-global, ymax = upr-global), width = 0, alpha = 0.4) +
  labs(x = "Mean site temperature (°C)", y = "Site-specific - Global optimum") +
  geom_hline(yintercept = 0, alpha = 0.5, color = "gray30", linetype = 2) +
  NULL
  
(ridge & coord_flip() & guides(color = "none")) + diff + plot_layout(widths = c(1, 0.6)) + plot_annotation(tag_levels = "A")

ggsave(paste0(home, "/figures/t_opt_ridges.pdf" ), width = 17, height = 8, units = "cm", device = cairo_pdf)

Plot area specific predictions

# Compare with area-specific sharpe scool!
area_pred_brms <- dat %>%
  group_by(area) %>% 
  data_grid(temp = seq_range(temp, n = 100)) %>% 
  ungroup() %>% 
  mutate(bk = 8.62e-05,
         tref = 8 + 273.15) %>% 
  add_epred_draws(fitbs)
group_by: one grouping variable (area)
ungroup: no grouping variables
mutate: new variable 'bk' (double) with one unique value and 0% NA
        new variable 'tref' (double) with one unique value and 0% NA
# Plot area specific predictions as facets
p1 <- ggplot(area_pred_brms, aes(temp, y = .epred, color = factor(area, order$area),
                           fill = factor(area, order$area))) +
  stat_lineribbon(.width = c(0.95), alpha = 0.4, color = NA) +
  stat_lineribbon(.width = c(0)) + 
  geom_point(data = dat, aes(temp, rate, color = factor(area, order$area)), size = 0.6) +
  scale_color_manual(values = colors, name = "Site") +
  scale_fill_manual(values = colors, name = "Site") +
  scale_linetype_manual(values = 2) +
  facet_wrap(~factor(area, rev(order$area))) +
  guides(fill = "none",
         color = guide_legend(nrow = 1, reverse = TRUE, title.position = "top", title.hjust = 0.5,
                              override.aes = list(size = 1))) +
  theme(axis.title.y = ggtext::element_markdown(),
        legend.position = "bottom",
        legend.direction = "horizontal") +
  labs(x = "Temperature (°C)",
       y = "von Bertalanffy growth coefficient (*k*)",
       linetype = "")

p1

ggsave(paste0(home, "/figures/supp/sharpe_school_bayes_ci_facet.pdf" ), width = 17, height = 17, units = "cm")
ggsave(paste0(home, "/figures/for-talks/sharpe_school_bayes_ci_facet.pdf" ), width = 14, height = 14, units = "cm")